---
title: "Replication materials for Gov 2001 Final Paper: Votes on Fire"
author: "Maria Ballesteros, Aleksandra Conevska, Ethan Miles"
date: "December 14, 2020"
output: 
  pdf_document:
    keep_tex: yes
---

This document replicates the analysis of "Votes on Fire".  



## Data Preparation

```{r setup, echo = F}

## Load packages ----
pacman::p_load(sf,
               data.table, 
               lwgeom,
               RSQLite,
               MASS,
               car,
               tidyverse,
               lubridate,
               raster,
               rgis, #remotes::install_github("Pakillo/rgis")
               nngeo,
               rgdal,
               units,
               knitr, 
               readstata13,
               plm,
               dplyr,
               reshape2,
               biglm,
               lfe,
               haven,
               ggplot2,
               rgdal,
               spData,
               spDataLarge,
               tmap,
               tmaptools,
               leaflet, #most popular java script mapping library in the world - Google maps, etc
               estimatr,
               stargazer,
               styler, #highlight a code chunk and press CNTRL+SHIFT+A for formating 
               tidycensus, 
               lmtest)

#devtools::install_version("velox", version = "0.2.0")
#devtools::install_github("Pakillo/rgis")

## Set working directory
#Aleksandra
#setwd('/Users/aleksandraconevska/Dropbox/Harvard/G1/2020/Courses/Gov-2001/Replication-Paper/Replication/data/processed/VOTING/')

#María
setwd("/Users/mariaballesteros/Dropbox/Harvard/G1/GOV 2001/Replication/data/processed/VOTING/")

# Ethan 
#setwd("/Users/ethan/Dropbox (Harvard University)/Replication/data/processed/VOTING")

## Load in the data 
votefire_1620 <-
  read.csv("./prectotal_offic_1620.csv")

reg_v<- read.csv("./reg_indv_voters.csv")

```

### Fixing Data from OK county

```{r}
################### THIS CHUNK COULD GO IN THE DATA MERGE COMPLETE SCRIPT #########
#### getting OK county data from individual data as there appears to be a data entry error. 

reg_v$to2020<- reg_v$voted2020/reg_v$totalreg
## replacing values for OK county with values from Indv level data
  ## first, droping FullPrc.y and renaming FullPrc.x to FullPrc
votefire_1620<- votefire_1620 %>%
                    dplyr::select(-c("FullPrc.y", "countycode")) %>%
                    rename(FullPrc = FullPrc.x)
reg_v<- reg_v %>% 
                dplyr::select(c("FullPrc", "totalreg", "voted2020", "to2020"))
                  
## generating dataset with OK replaced
votefire_2<- left_join(votefire_1620, reg_v, by = "FullPrc", all.x = T)
attach(votefire_2)

for( i in 1:nrow(votefire_2)){
if(CountyName[i] == "OK"){
votefire_2$ballotscast2020[i]<-  votefire_2$voted2020[i]
votefire_2$regvoters2020[i]<-    votefire_2$totalreg[i]
votefire_2$turnout2020[i]<-      votefire_2$to2020[i]
}
}



ggplot(votefire_2)+ 
  geom_point(aes(x = PrecDist5000Fire_km, y = turnout2020), color = "orangered3", alpha = .4) +
  xlim(0,300) +
  ggtitle("Precinct Turnout Percentage by Distance from Nearest Fire (KM)") + 
  xlab("Distance from Nearest Fire (KM)") + 
  ylab("Precinct Turnout Percentage") +
  geom_smooth(method = "lm", aes(x = PrecDist5000Fire_km, y = turnout2020), color = "black") +
  theme_bw() + 
  theme(plot.title = element_text(hjust = 0.5)) 

#ggsave("TurnoutByFireDistance.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")


## dropping 2 rogue OK precincts and extra variables

votefire_2<- votefire_2 %>%
                        filter(turnout2020<= 1) %>%
                        dplyr::select(-c("totalreg", "voted2020", "to2020"))


### ## creating proportion demographic variables
votefire_2$prop_fem<- votefire_2$fem/votefire_2$regvoters2020
votefire_2$prop_under30<- votefire_2$age18_30/votefire_2$regvoters2020


## creating turnout variables by demographic category
votefire_2$fem_turnout <- votefire_2$fem_voted/votefire_2$fem
votefire_2$male_turnout <- votefire_2$male_voted/votefire_2$male
votefire_2$young_turnout <- votefire_2$young_voted/votefire_2$age18_30

## selecting complete cases
delvars <- delvars <- c('turnout2016', 'turnout2020', 'BAWithin0to5km',
             'WHPMean', 'crit_work', 'remindex', 'proximity',
             'exposure', 'urban', 'FIPS')
votefire2_complete <- votefire_2[complete.cases(votefire_2[ , delvars]),]
FullPrc<-votefire2_complete$FullPrc
```



##### Analysis #####


## Table 1 
```{r}

# Creating lengths for cluster robust se 
G <- length(unique(votefire2_complete$CountyCode)) 
N <- length(votefire2_complete$CountyCode)

# model 1 - binary iv, no turnout trend, controls, no fixed effects
m1 <-  lm(
        turnout2020 ~ BAWithin0to5km  + WHPMean + crit_work + 
        remindex + proximity + exposure + urban + pop, 
        data = votefire2_complete) 

dfa1 <- (G/(G - 1)) * (N - 1)/m1$df.residual#computing df adjustment for clustered SE 
firm_c_vcov1 <- dfa1 * vcovHC(m1, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m1_r <- coeftest(m1, vcov = firm_c_vcov1)# run models with clustered SE



# model 2 - binary iv, turnout trend, controls, no fixed effects
m2 <-  lm(
       turnout2020 ~ BAWithin0to5km  + WHPMean + turnout2016 +
       crit_work + remindex + proximity + exposure + urban + pop, 
       data = votefire2_complete) 

dfa2 <- (G/(G - 1)) * (N - 1)/m2$df.residual#computing df adjustment for clustered SE 
firm_c_vcov2 <- dfa2 * vcovHC(m2, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m2_r <- coeftest(m2, vcov = firm_c_vcov2)# run models with clustered SE


# model 3 - binary iv, turnout trend, no controls,  fixed effects

# m3_rob = lm_robust(
#  turnout2020 ~ BAWithin0to5km  + WHPMean + turnout2016,
 # fixed_effects = ~ CountyCode,
#  cluster = CountyCode,
#  se_type = "CR0",
#  data = votefire2_complete
#)

m3 <- lm(turnout2020 ~ BAWithin0to5km  + WHPMean + turnout2016 + factor(CountyCode),
          data = votefire2_complete)
dfa3 <- (G/(G - 1)) * (N - 1)/m3$df.residual #computing df adjustment for clustered SE 
firm_c_vcov3 <- dfa3 * vcovHC(m3, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m3_r <- coeftest(m3, vcov = firm_c_vcov3)# run models with clustered SE


## PLM m3 <- plm(turnout2020 ~ BAWithin0to5km  + WHPMean + turnout2016,
#          data = votefire2_complete, 
#          model = 'within', 
#          index = c('CountyCode'))


dfa3 <- (G/(G - 1)) * (N - 1)/m3$df.residual #computing df adjustment for clustered SE 
firm_c_vcov3 <- dfa3 * vcovHC(m3, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m3_r <- coeftest(m3, vcov = firm_c_vcov3)# run models with clustered SE


# model 4 -  continuous iv, no turnout trend, covid controls,  no fixed effects 
m4 <-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop,
  data = votefire2_complete
) 

PrecDist5000Fire_km_new

dfa4 <- (G/(G - 1)) * (N - 1)/m4$df.residual#computing df adjustment for clustered SE 
firm_c_vcov4 <- dfa4 * vcovHC(m4, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m4_r <- coeftest(m4, vcov = firm_c_vcov4)# run models with clustered SE





# model 5 - continuous iv, turnout trend, covid controls, no fixed effects
m5 <-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + turnout2016 +
    crit_work + remindex + proximity + exposure + urban + pop,
  data = votefire2_complete
)

dfa5 <- (G/(G - 1)) * (N - 1)/m5$df.residual#computing df adjustment for clustered SE 
firm_c_vcov5 <- dfa5 * vcovHC(m5, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m5_r <- coeftest(m5, vcov = firm_c_vcov5)# run models with clustered SE




# model 6 -  continuous iv, turnout trend, no covid controls,  fixed effects


#m6_rob = lm_robust(
 # turnout2020 ~ PrecDist5000Fire_km  + WHPMean + turnout2016,
#  fixed_effects = ~ CountyCode,
#  cluster = CountyCode,
#  se_type = "CR0",
#  data = votefire2_complete
#)

# THIS WORKS INSTEAD OF LM_ROBUST. PRODUCES THE SAME SE AND COEFFS ALMOST EXACTLY. MIGHT WANT TO JUST USE THIS INSTEAD OF TRYING TO GET LM_ROBUST TO WORK
# IN STARGAZER. IF EVERYONE AGREES WE CAN JUST QUICKLY CHANGE THE CODE 

## PLM m6 <- plm(turnout2020 ~ PrecDist5000Fire_km  + WHPMean + turnout2016, data = votefire2_complete, model = 'within', index = c('CountyCode'))

m6 <- lm(turnout2020 ~ PrecDist5000Fire_km  + WHPMean + turnout2016 + factor(CountyCode), data = votefire2_complete)

dfa6 <- (G/(G - 1)) * (N - 1)/m6$df.residual #computing df adjustment for clustered SE 
firm_c_vcov6 <- dfa6 * vcovHC(m6, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m6_r <- coeftest(m6, vcov = firm_c_vcov6)# run models with clustered SE


#### on how to add lm_robust objects to stargazer: https://declaredesign.org/r/estimatr/articles/regression-tables.html


```


## Stargazer tables 
```{r results="asis"}
## 

stargazer(m1, m2, m3, m4, m5, m6,
           se = starprep(m1, m2, m3, m4, m5, m6, 
                         clusters = votefire2_complete$FullPrc, se_type = "stata"),
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Main result with different measure of fire exposure",
          omit = c("WHPMean", "crit_work", "remindex", "proximity",
                   "exposure", "urban", "pop", "turnout2020", "Constant", "CountyCode"),
          add.lines = list(c("County FE", "", "", "X", "", "", "X"), 
                           c("Controls", "X", "X", "", "X", "X", "")),
          covariate.labels = c("Fire within 0-5km", "Distance to nearest fire"),
          dep.var.caption = "Turnout 2020 (mean = ; sd = 0.",
          keep.stat = c("rsq", "n"),
          digits = 3)



```



## Testing turnout change
```{r results = "asis"}

#Creating vote change variable 
votefire_2$turnout_change <- votefire_2$turnout2020 - votefire_2$turnout2016
summary(votefire_2$turnout_change)

#running without fixed effects 
m7 <-  lm(
  turnout_change ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop,
  data = votefire_2
) 

dfa7 <- (G/(G - 1)) * (N - 1)/m7$df.residual#computing df adjustment for clustered SE 
firm_c_vcov7 <- dfa7 * vcovHC(m7, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m7_r <- coeftest(m7, vcov = firm_c_vcov7)# run models with clustered SE


#running with fixed effects
m8 = lm_robust(
  turnout_change ~ PrecDist5000Fire_km  + WHPMean,
  fixed_effects = ~ CountyCode,
  cluster = CountyCode,
  se_type = "CR0",
  data = votefire_2
)


## Fixed effects, in lm form (vs lm_robust) 
m8 <-  lm(
  turnout_change ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop + factor(CountyCode),
  data = votefire_2
) 


dfa8 <- (G/(G - 1)) * (N - 1)/m8$df.residual#computing df adjustment for clustered SE 
firm_c_vcov8 <- dfa8 * vcovHC(m8, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
m8_r <- coeftest(m8, vcov = firm_c_vcov8)# run models with clustered SE


## is this the correct setup for SEs?
stargazer(m7, m8,
           se = starprep(m7, m8, 
                         clusters = votefire2_complete$FullPrc, se_type = "stata"),
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Turnout Change",
          omit = c("WHPMean", "crit_work", "remindex", "proximity",
                   "exposure", "urban", "pop", "turnout2020", "Constant", "CountyCode"),
          add.lines = list(c("County FE", "", "X"), 
                           c("Controls", "X", "X")),
          dep.var.caption = "Turnout 2020 (mean = ; sd = 0.",
          keep.stat = c("rsq", "n"),
          digits = 3)


```

## creating model with demographic controls
````{r, results = "asis"}


### first, creating percent female and percent age


m4 <-  lm(
        turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work + 
        remindex + proximity + exposure + urban + pop, 
        data = votefire2_complete)

c1<-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop + prop_under30,
  data = votefire2_complete
) 


c2<-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop + prop_fem,
  data = votefire2_complete
) 


c3<-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop + prop_under30*prop_fem,
  data = votefire2_complete
) 


stargazer(
  m4, c1, c2, c3,
  se = starprep(m4, c1, c2, c3, 
                clusters = votefire2_complete$FullPrc, se_type = "stata"), 
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Main result with different measure of fire exposure",
          omit = c("WHPMean", "crit_work", "remindex", "proximity",
                   "exposure", "urban", "pop", "turnout2020", "Constant"),
          column.labels = c("Baseline", "Baseline w Gender", "Baseline w young", "Baseline w age \ gender"),
          covariate.labels = c( "Distance to nearest fire"),
          dep.var.caption = "Turnout 2020 (mean = ; sd = 0.",
          keep.stat = c("rsq", "n"),
          digits = 3
)
#### Removing cases where age is missing #only 2#
delvars <- delvars <- c("fem_turnout", "young_turnout", "male_turnout")
             
votefire_dem <- votefire2_complete[complete.cases(votefire2_complete[ , delvars]),]

M4 <-  lm(
        turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work + 
        remindex + proximity + exposure + urban + pop, 
        data = votefire_dem)
c4 <-  lm(
        fem_turnout ~ PrecDist5000Fire_km  + WHPMean + crit_work + 
        remindex + proximity + exposure + urban + pop + prop_fem, 
        data = votefire_dem)
c5<-  lm(
  male_turnout ~ PrecDist5000Fire_km  + WHPMean + crit_work + prop_fem +
    remindex + proximity + exposure + urban + pop,
  data = votefire_dem
) 


c6<-  lm(
  young_turnout ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop + prop_under30,
  data = votefire_dem
) 

stargazer(
  M4, c4, c5, c6,
  se = starprep(M4, c4, c5, c6, 
                clusters = votefire_dem$FullPrc, se_type = "stata"), 
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Main result with different measure of fire exposure",
          omit = c("WHPMean", "crit_work", "remindex", "proximity",
                   "exposure", "urban", "pop", "turnout2020", "Constant"),
          column.labels = c("Baseline", "Turnout: Female Voters", "Turnout:Male Voters", "Turnout: age 30 and under"),
          covariate.labels = c( "Distance to nearest fire"),
          dep.var.caption = "Turnout 2020 (mean = ; sd = 0.",
          keep.stat = c("rsq", "n"),
          digits = 3
)


````
## Testing if WHP predicts fires 
```{r}

WHP_test <- plm(BASqMSum ~ WHPMean, data = votefire2_complete, model = 'within', index = c('CountyCode'))

dfatest <- (G/(G - 1)) * (N - 1)/WHP_test$df.residual#computing df adjustment for clustered SE 
firm_c_vcovtest <- dfatest * vcovHC(WHP_test, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
WHPtest_r <- coeftest(WHP_test, vcov = firm_c_vcovtest)# run models with clustered SE


stargazer(
  WHPtest_r,
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Testing relationship between fire vulnerability and burned area",
          omit = c("Constant"),
          column.labels = c("Burned Area (Sq m)"),
          covariate.labels = c("Wildfire Hazard Potential"),
          dep.var.caption = "",
          keep.stat = c("rsq", "n"),
          digits = 3
)
```

## Figure X: Map of Wildfire 
```{r, echo = F}
## Setting wd and path

#Aleksa
# firePath <- paste0('/Users/aleksandraconevska/Dropbox/Harvard/G1/2020/Courses/Gov-2001/Replication-Paper/Replication/data/processed/BA_PRECINCT_INT')


#María
firePath<- paste0("/Users/mariaballesteros/Dropbox/Harvard/G1/GOV 2001/Replication/data/processed/BA_PRECINCT_INT")

# Ethan
#firePath <- paste0('/Users/ethan/Dropbox (Harvard University)/Replication/data/processed/BA_PRECINCT_INT')


firepath <- read_sf(firePath)


## Loading appropriate data format 
my_spdf <- readOGR( 
  dsn = firePath, #dataset name - just pointing to the shape file 
  layer ="wash_ba_af_2018_int", #layer - thats how these files are structured, it creates a connection to the file which is a database
                                #but uses this to get the data that we need from the database       
  verbose =FALSE
)


my_spdf$BAPrec[is.na(my_spdf$BAPrec)] <- 0
my_spdf$BAPrcAc[is.na(my_spdf$BAPrcAc)] <- 0

## Creating BA  measure in km 
my_spdf$BAPrec_km <- my_spdf$BAPrec/1000


## Create map using tmap - meters
washfire_map_m <-  tm_shape(my_spdf) +
  tm_fill(col = 'BAPrec')


## Create map using tmap - km
washfire_map_km <-  tm_shape(my_spdf) +
  tm_fill(col = 'BAPrec_km')

#Aleksa setwd("~/Dropbox/Harvard/2020/Fall2020/Gov-2001/Replication-Paper/Replication/output/")
#Maria
setwd("/Users/mariaballesteros/Dropbox/Harvard/G1/GOV 2001/Replication/output/")

# Ethan
#setwd("/Users/ethan/Dropbox (Harvard University)/Replication/output/")

#tmap_save(tm = washfire_map_km, 
       #   filename = "washfire_map_km", 
         # width = 3200, height = 2540, units = "px", dpi = 350)

## Create map using tmap - acres
washfire_map_ac <-  tm_shape(my_spdf) +
  tm_fill(col = 'BAPrcAc')
washfire_map_ac

tmap_save(tm = washfire_map_ac, 
          filename = "Figures/washfire_map_ac", 
          width = 3200, height = 2540, units = "px", dpi = 350)

```


## Figure X: Main results with different measures of fire exposure 
```{r}
### Ethan  ### this plot needs confidence intervals
distlist.granular = seq(from=5, to=40, by=5)

f.reltomedian = as.formula("turnout2020~BAWithin0to5km + BAWithin5to10km + 
        BAWithin10to15km + BAWithin15to20km + BAWithin20to25km +
        BAWithin25to30km + BAWithin30to35km + BAWithinOver40km +
        turnout2020 +  WHPMean ")


lmr.reltomedian = lm(
                   formula = f.reltomedian, 
                   data=votefire_2)

coef.reltomedian = summary(lmr.reltomedian)$coef


# Regresion table to accompany figure - could go in Appendix 
rownames(coef.reltomedian)= 
  c("Fire within 0-5km","Fire within 5-10km", "Fire within 10-15km",
    "Fire within 15-20km","Fire within 20-25km","Fire within 25-30km",
    "Fire within 30-35km", "Fire over 40km away", 
    "Voter turnout 2016", "Fire Vulnerability")

xtable::xtable(coef.reltomedian[1:10,1:4], digits = 3)


# Prepare for plotting 
whichdistances = distlist.granular[1:8]
numdistances = length(whichdistances)
coefs.intervals = coef.reltomedian[1:numdistances,1]
ses.intervals = coef.reltomedian[1:numdistances,2]

plot.dat = data.frame(coefs.intervals, ses.intervals, ub=coefs.intervals+2.58*ses.intervals, 
                      lb = coefs.intervals-2.58*ses.intervals, dist=whichdistances)

g = ggplot(plot.dat, aes(y=coefs.intervals, x=dist, color = I("orangered3"), alpha = .4)) +
    geom_point() + 
    theme_bw() + 
    geom_errorbar(aes(ymin=lb,ymax=ub), color = "black", width=0.1) + 
    labs(x="Distance from nearest wildfire (km)") + 
    scale_y_continuous(name="Estimated effect of wildfire on turnout", limits=c(-1, 1)) + 
    geom_hline(yintercept = 0) +
    geom_pointrange(aes(ymin=lb, ymax=ub)) + 
    theme(legend.position = "none")



g + scale_x_continuous(breaks=seq(10,40,10), 
                       labels=c("5-10 km","15-20 km","25-30 km","30-35 km"))

#ggsave("Effect_Of_Fire_On_Turnout.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")


#dev.off()
#ggsave(filename = "fig_dist_reltomedian.jpg", plot = last_plot(), 
       #device = "jpeg", path = "figures",
      # width = 6, height = 4, units = "in")
```

##Running additional models and robustness checks
````{r}
## Set working directory
#Aleksandra
#setwd( '/Users/aleksandraconevska/Dropbox/Harvard/2020/Fall2020/Gov-2001/Replication-Paper/Replication/data/processed/VOTING/')

#María

##### seeing how result change OK original vs Ok replaced
attach(votefire_1620)

## This looks sus? idk why there are so many < 1
plot(x = PrecDist5000Fire_km, y = turnout2020)



### our standard errors are much larger for the clustered model, so i'll try transformations

############ getting rid of precincts with turnout higher than one to check results

# model t1-  continuous iv, no turnout trend, covid controls,  no fixed effects w/o precincts with turnout greater than 1
test<- votefire_1620 %>% filter(turnout2020 <=1)

ggplot(test)+ 
  geom_point(aes(x = PrecDist5000Fire_km, y = turnout2020), color = "orangered3", alpha = .4) +
  xlim(0,300) +
  ggtitle("Precinct Turnout Percentage by Distance from Nearest Fire (KM)") + 
  xlab("Distance from Nearest Fire (KM)") + 
  ylab("Precinct Turnout Percentage") +
  geom_smooth(method = "lm", aes(x = PrecDist5000Fire_km, y = turnout2020), color = "black") +
  theme_bw() + 
  theme(plot.title = element_text(hjust = 0.5)) 

#ggsave("TurnoutByFireDistance_Test.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")

t1 <-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban + pop,
  data = test
) 

dfat1 <- (G/(G - 1)) * (N - 1)/t1$df.residual#computing df adjustment for clustered SE 
firm_c_vcovt1 <- dfat1 * vcovHC(t1, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
t1_r <- coeftest(t1, vcov = firm_c_vcovt1)# run models with clustered SE
#


                       

### trying models with new values of OK
# model t1-  continuous iv, no turnout trend, covid controls,  no fixed effects w/o precincts with turnout greater than 1

t2 <-  lm(
  turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure + urban,
  data = votefire_2
) 

dfat2 <- (G/(G - 1)) * (N - 1)/t2$df.residual#computing df adjustment for clustered SE 
firm_c_vcovt2 <- dfat2 * vcovHC(t2, type = "HC0", cluster = "group", adjust = T) # display with cluster VCE and df-adjustment
t2_r <- coeftest(t2, vcov = firm_c_vcovt2)# run models with clustered SE


## Turnout 



# QQ plot

qplot(sample = turnout2020, na.rm = TRUE, color = I("orangered3"), alpha = .4) +
      ylab("Voter Turnout 2020") + 
      xlab("Theoretical Quantiles") + 
      ggtitle("Normal Q-Q Plot, Voter Turnout 2020 ") + 
      geom_abline(intercept = max(turnout2020, na.rm = TRUE)/2, slope = max(turnout2020, na.rm = TRUE)/8) +
      theme_bw() + 
      theme(legend.position = "none") + 
      theme(plot.title = element_text(hjust = 0.5)) 

ggsave("QQ-turn.jpeg", path = "/Users/aleksandraconevska/Dropbox/Harvard/G1/2020/Courses/Gov-2001/Replication-Paper/Replication/output/Figures")

setwd('/Users/aleksandraconevska/Dropbox/Harvard/G1/2020/Courses/Gov-2001/Replication-Paper/

#ggsave("QQ.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")


## RESIDUALS PLOT

ggplot(m4) +
  aes(m4$model$PrecDist5000Fire_km, m4$residuals, color = I("orangered3"), alpha = .4) +
  geom_point() +
  geom_hline(aes(yintercept = 0, color ="black")) +
  xlab("Predicted Precinct Distance from Nearest Fire (KM)") +
  ylab("Residuals") +
  scale_y_continuous(limits = c(-.9, .3), breaks = c(-.9, -.6, -.3, 0, .3)) +
  theme_bw() + 
  theme(legend.position = "none")
  
#ggsave("Residuals.jpg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")



## Trying distance measure transformations  
#votefire2_complete$PrecDist5000Fireksq <- sqrt(PrecDist5000Fire_km)

#votefire2_complete$PrecDist5000Fireksq2 <- PrecDist5000Fire_km^2

#attach(votefire2_complete)

#is.na(votefire2_complete$PrecDist5000Fire_km) <- votefire2_complete$PrecDist5000Fire_km == "0"
#is.na(votefire2_complete$PrecDist5000Fire_km) <- votefire2_complete$PrecDist5000Fire_km < "1"


#m <- lm(turnout2020 ~ PrecDist5000Fire_km, data = votefire2_complete) 
  
#MASS::boxcox(PrecDist5000Fire_km, lambda = seq(-0.25, 0.75, by = 0.05), plotit = TRUE)
#MASS::boxcox(turnout2020 ~ PrecDist5000Fire_km)

#car::boxCox(PrecDist5000Fire_km, lambda = seq(-0.25, 0.5, 1/10), plotit = TRUE)


#distance <- votefire2_complete %>%
                   # dplyr::select(c("PrecDist5000Fire_km")) 

#MASS::boxcox(, lambda = "auto")



## RESIDUALS AND QQ PLOT POST TRANSFORM

#m4t <-  lm(
# turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
 #   remindex + proximity + exposure + urban + pop,
 # data = votefire2_complete
#) 

## (ethan) -- I still need to get these to work

#Res plot
#ggplot(m4t) +
#  aes(m4t$model$PrecDist5000Fireksq, m4t$residuals, color = I("orangered3"), alpha = .4) +
# geom_point() +
#  geom_hline(aes(yintercept = 0, color ="black")) +
#  xlab("Predicted Precinct Distance from Nearest Fire (KM)") +
# ylab("Residuals") +
#  scale_y_continuous(limits = c(-.9, .3), breaks = c(-.9, -.6, -.3, 0, .3)) +
 # theme_bw() + 
 # theme(legend.position = "none")

ggsave("Transformed_Residuals.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")


#QQ plot
#qplot(
#  sample = PrecDist5000Fireksq2,
#  na.rm = TRUE,
#  color = I("orangered3"),
##  alpha = .4
#) +
#  ylab("Precinct Distance from Fire, KM") +
#  xlab("Theoretical Quantiles") +
 # ggtitle("Normal Q-Q Plot, Precinct District from Fire (KM) ") +
# geom_abline(
 #   intercept = max(PrecDist5000Fireksq2, na.rm = TRUE) / 2,
 #   slope = max(PrecDist5000Fireksq2, na.rm = TRUE) / 8
 # ) +
#  theme_bw() +
#  theme(legend.position = "none") +
 # theme(plot.title = element_text(hjust = 0.5)) 

#ggsave("Transformed_QQ.jpeg", path = "/Users/ethan/Dropbox (Harvard University)/Replication/output/Figures")


`````

### Why is our coefficient flipping? Different versions of our baseline model

````{r, results = 'asis'}

### first version no clusters no controls
mt1 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean,
        data = votefire2_complete) 

se1 <- coef(summary(mt1))[,2]

### no controls clusters
mt2 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean,
        data = votefire2_complete) 

se2 <- starprep(mt2, clusters = votefire2_complete$FullPrc, se_type = "stata")

### clusters and covid controls

mt3 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + crit_work +
    remindex + proximity + exposure,
        data = votefire2_complete) 

se3 <- starprep(mt3, clusters = votefire2_complete$FullPrc, se_type = "stata")

### clusters and urban controls

mt4 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + pop + urban,
        data = votefire2_complete) 

se4 <- starprep(mt4, clusters = votefire2_complete$FullPrc, se_type = "stata")

## clusters, urban controls and covid controls. 

mt5 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + pop + urban+ crit_work +
    remindex + proximity + exposure,
        data = votefire2_complete) 

se5 <- starprep(mt5, clusters = votefire2_complete$FullPrc, se_type = "stata")

## clusters, trend controls

mt6 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + turnout2016,
        data = votefire2_complete) 

se6 <- starprep(mt6, clusters = votefire2_complete$FullPrc, se_type = "stata")

## clusters, urban controls, trend controls,  and covid controls. 

mt7 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + pop + urban+ crit_work +
    remindex + proximity + exposure + turnout2016,
        data = votefire2_complete) 

se7 <- starprep(mt7, clusters = votefire2_complete$FullPrc, se_type = "stata")

## clusters and fixed effects

mt8 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean  + factor(CountyCd) + turnout2016,
        data = votefire2_complete) 

se8 <- starprep(mt8, clusters = votefire2_complete$FullPrc, se_type = "stata")

## no clusters and fixed effects

mt9 <-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean  +  factor(CountyCd) + turnout2016,
        data = votefire2_complete) 

se9 <- coef(summary(mt9))[,2]

stargazer(mt1,mt2, mt3, mt4, mt5, mt6, mt7, mt8, mt9, 
           se = c(se1, se2, se3, se4, se5, se6, se7, se8, se9),
          header = FALSE,
          df=FALSE, 
          style = "apsr",
          title = "Main result with different measure of fire exposure",
          keep = c("Intercept", "PrecDist5000Fire_km", "urban", "pop", "crit_work",
                    "remindex" , "proximity" , "exposure" ),
          add.lines = list(c("County FE", "", "", "", "", "", "", "", "X", "X"), 
                           c("Covid Controls", "", "", "X", "", "X", "", "X", "", ""), 
                           c("Trend Controls", "", "", "", "", "", "X", "X", "", ""), 
                           c("Urban Controls", "", "", "", "X", "X", "", "X", "", ""), 
                           c("Clusters", "", "X", "X", "X", "X", "X", "X", "X", "")),
          covariate.labels = c("Distance to nearest fire"),
          dep.var.caption = "Turnout 2020 (mean = ; sd = 0.",
          keep.stat = c("rsq", "n"),
          digits = 3)
````

````{r}

#### trying models for different distances

summary(PrecDist5000Fire_km)
max(PrecDist5000Fire_km, na.rm = T)
values<- c()
coefs <-c()

for(i in 5:max(PrecDist5000Fire_km, na.rm = T) ){
  data = filter(votefire2_complete, PrecDist5000Fire_km <= i)
  model<-  lm( turnout2020 ~ PrecDist5000Fire_km  + WHPMean + pop + urban+ crit_work +
    remindex + proximity + exposure + turnout2016, data = data)
  
    se <-unlist(starprep(model, clusters = data$FullPrc, se_type = "stata"))
    values<- tibble(distance = i, coef = coef(model)[2], se= se[2])
    coefs<-rbind(coefs, values)
}


ggplot(coefs, aes(x= distance, y = coef))+
  geom_line() 

values2<- c()
coefs2 <-c()

for(i in 5:max(PrecDist5000Fire_km, na.rm = T) ){
  data <- votefire2_complete
  data$fire<-ifelse(data$PrecDist5000Fire_km <= i, 1, 0)
  model<-  lm( turnout2020 ~ fire  + WHPMean + pop + urban+ crit_work +
    remindex + proximity + exposure + turnout2016, data = data)
  
    se <-unlist(starprep(model, clusters = data$FullPrc, se_type = "stata"))
    values2<- tibble(distance = i, coef = coef(model)[2], se= se[2])
    coefs2<-rbind(coefs, values)
}


ggplot(coefs2, aes(x= distance, y = coef))+
  geom_line(color="red") 
`````


